# import packages
import pandas as pd
import sys

# read in tom's data and aggregate to the return level
tom_data = pd.read_csv('/REDACTED/HertzGraff/TY2014_KIDLINK.csv')
tom_data_kids = tom_data[tom_data.has_kids==1]
tom_data_agged = tom_data_kids.groupby('taxpayer_id').agg({'total_pos_inc_class':'mean', 'pb':'mean', 'pnb':'mean', 'has_dad':'mean', 'has_mom':'mean'})

# write results out to a txt file
sys.stdout = open("/REDACTED/missing_parents_ssn.txt", "w")
print("Overall")
print("Missing Mother's SSN")
1-tom_data_agged.has_mom.mean()
print("Missing Father's SSN")
1-tom_data_agged.has_dad.mean()

print("\nBlack")
tom_data_agged['black_has_mom'] = tom_data_agged['pb'] * tom_data_agged['has_mom']
print("Missing Mother's SSN")
1 - (tom_data_agged.black_has_mom.sum()) / (tom_data_agged.pb.sum())

tom_data_agged['black_has_dad'] = tom_data_agged['pb'] * tom_data_agged['has_dad']
print("Missing Father's SSN")
1 - (tom_data_agged.black_has_dad.sum()) / (tom_data_agged.pb.sum())

print("\nNon-Black")
tom_data_agged['nonblack_has_mom'] = tom_data_agged['pnb'] * tom_data_agged['has_mom']
print("Missing Mother's SSN")
1 - (tom_data_agged.nonblack_has_mom.sum()) / (tom_data_agged.pnb.sum())

tom_data_agged['nonblack_has_dad'] = tom_data_agged['pnb'] * tom_data_agged['has_dad']
print("Missing Father's SSN")
1 - (tom_data_agged.nonblack_has_dad.sum()) / (tom_data_agged.pnb.sum())

sys.stdout = sys.__stdout__